First of all load all packages needed for further computations.
library(dplyr)
library(ggplot2)
library(RColorBrewer)
library(xtable)
library(doBy)
library(scales)
library(gridExtra)
library(plotly)
1 Rename columns for convenience in further ploting for the ATM data set.
ATM=read.table("http://www.trutschnig.net/Datensatz.txt",head=TRUE)
ATM$ymd=as.Date(ATM$ymd)
names(ATM)[names(ATM)=="ymd"] = "date"
names(ATM)[names(ATM)=="sum_out"] = "cash"
1.a. Load and sample from the RTR Data due to computational restrictions.
address = url("http://www.trutschnig.net/RTR2015.RData")
load(address)
RTR2015_sample = RTR2015[sample(nrow(RTR2015), 2500), ]
# RTR2015_4G_sample = RTR2015[ which(RTR2015$nw_cat=='4G'), ]
2 A simple ggplot example with the ATM data and alpha of 0.3.
p = ggplot(ATM, aes(date, cash)) +
geom_line(aes(group = weekday),alpha = 0.3)
p
subplot(
p, ggplotly(p),
ggplot(ATM, aes(date, cash)) + geom_bin2d(),
ggplot(ATM, aes(date, cash)) + geom_hex(),
nrows = 2, shareX = TRUE, shareY = TRUE,
titleY = T, titleX = T
)
4EX: Modify the plot_ly function in that hovering over datapoints yields the info holiday.
ATMgrouped = group_by(ATM, weekday)
p = plot_ly(ATMgrouped, x = ~date, y = ~cash, mode = 'markers',
hoverinfo = 'text',
text = ~paste('</br> weekday: ', weekday,
'</br> cash: ', cash,
'</br> holiday: ', holiday))
p
5 Plotly objects can be piped as any other data, let us pipe it to the add_lines() function.
p %>%
add_lines(name = ~"cash", alpha = 0.2)
6 Nested add_lines() functions.
add_lines(
add_lines(p, alpha = 0.2, name = "cash withdrawals"),
name = "Friday", data = filter(ATM, weekday == "Fri")
)
7 different line types in a subplot
subplot(
add_lines(p, color = ~weekday),
add_lines(p, linetype = ~weekday),
shareX = TRUE, nrows = 2
)
8EX: Simplify the preceding code for the plotly function subplot() by avoiding nested functions and please deactivate the hoverinformation, think about why the hoverinformation might be usefull in this case ?
8 we build a plotly object with the line plot type and we group the data by weekdays and no infos for hovering.
allWeekdays = ATM %>%
group_by(weekday) %>%
plot_ly(x = ~date, y = ~cash) %>%
add_lines(alpha = 0.2, name = "cash withdrawals", hoverinfo = "none")
allWeekdays
9EX: Highlight the series of cash withdrawals for fridays within the last plot.
9 rangeslider() and highlighting cash withdrawals for fridays
allWeekdays %>%
filter(weekday == "Fri") %>%
add_lines(name = "Fri") %>%
rangeslider()
EX10 Highlight mondays in the last plot
allWeekdays %>%
add_fun(function(plot) {
plot %>% filter(weekday == "Fri") %>% add_lines(name = "Fri")
}) %>%
add_fun(function(plot) {
plot %>% filter(weekday == "Mon") %>%
add_lines(name = "Mon")
}) %>%
rangeslider()
EX11 Use the rangeslider() function but not by piping the plotly object to it.
rangeslider(allWeekdays)
12 regression methods applied to download speed and upload speed over all providers using ggplot() and ggplotly().
p = ggplot(RTR2015_sample, aes(x = rtr_speed_ul, y = rtr_speed_dl)) +
geom_point(alpha = 0.05, color = "magenta") +
geom_smooth(color = "blue") +
geom_smooth(method = "lm", se = F, color = "black")
ggplotly(p, hoverinfo = "none")
13 plotly_data() returns data associated with a plotly visualization for looking up what’s in a plotly object parcticularly after a while of data manipulations.
p %>%
ggplotly(layerData = 2, originalData = FALSE, hoverinfo = "none") %>%
plotly_data()
## # A tibble: 80 x 13
## x y ymin ymax se PANEL group colour fill size
## <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int> <chr> <chr> <dbl>
## 1 4 547. -1543. 2637. 1066. 1 -1 blue grey60 1
## 2 616. 8572. 6758. 10386. 925. 1 -1 blue grey60 1
## 3 1229. 7355. 5735. 8976. 827. 1 -1 blue grey60 1
## 4 1841. 8750. 7094. 10406. 845. 1 -1 blue grey60 1
## 5 2453. 8328. 6923. 9733. 717. 1 -1 blue grey60 1
## 6 3066. 11464. 10257. 12671. 616. 1 -1 blue grey60 1
## 7 3678. 16249. 14680. 17818. 801. 1 -1 blue grey60 1
## 8 4290. 20270. 18447. 22093. 930. 1 -1 blue grey60 1
## 9 4903. 23696. 21988. 25405. 872. 1 -1 blue grey60 1
## 10 5515. 26723. 25106. 28341. 825. 1 -1 blue grey60 1
## # ... with 70 more rows, and 3 more variables: linetype <dbl>,
## # weight <dbl>, alpha <dbl>
14 Adding annotations to points of interest in the plot with add_segments().
p %>%
ggplotly(layerData = 2, originalData = F) %>%
add_fun(function(p) {
p %>% slice(which.max(se)) %>%
add_segments(x = ~x, xend = ~x, y = ~ymin, yend = ~ymax) %>%
add_annotations("Maximum uncertainty", ax = 60)
}) %>%
add_fun(function(p) {
p %>% slice(which.min(se)) %>%
add_segments(x = ~x, xend = ~x, y = ~ymin, yend = ~ymax) %>%
add_annotations("Minimum uncertainty")
})
15 Using subplot() look at scatte with different colour transparencies with the alpha() function.
subplot(
plot_ly(RTR2015_sample, x = ~rtr_speed_dl, y = ~rtr_speed_ul, name = "default"),
plot_ly(RTR2015_sample, x = ~rtr_speed_dl, y = ~rtr_speed_ul) %>%
add_markers(alpha = 0.2, name = "alpha = 0.2"),
plot_ly(RTR2015_sample, x = ~rtr_speed_dl, y = ~rtr_speed_ul) %>%
add_markers(alpha = 0.02, name = "alpha = 0.02")
)
16 A simple 3D plot with plot_ly()
plot_ly(RTR2015_sample, x = ~rtr_speed_dl, y = ~rtr_speed_ul, z = ~rtr_ping, name = "default")
17EX Create a plotly object and differentiate the scatter via the operator name (op_name) in terms of different colors for the x/y/z-pairs. You might use the methods from before to further alleviate the overplotting issue within the add__* function from before.
17 add_markers() function for differentiating the data points with categorical information.
plot_ly(RTR2015_sample, x = ~rtr_speed_dl, y = ~rtr_speed_ul, z = ~rtr_ping) %>%
add_markers(alpha = 0.2, color = ~op_name)
18 The same plot from before in 2 dimensions.
plot_ly(RTR2015_sample, x = ~rtr_speed_dl, y = ~rtr_speed_ul) %>%
add_markers(alpha = 0.2, color = ~op_name)
19 Going deeper into distributions of download and upload speed.
x = RTR2015_sample$rtr_speed_dl
y = RTR2015_sample$rtr_speed_ul
s = subplot(
plot_ly(RTR2015_sample, x = x, color = I("green")) %>% add_trace(x = x, name = 'download speed'),
plotly_empty(),
plot_ly(RTR2015_sample, x = x, y = y, color = I("blue")) %>% add_markers(alpha = 0.2, color = ~op_name),
plot_ly(y = y, color = I("blue")) %>% add_trace(y = y, name = 'upload speed'),
nrows = 2, heights = c(0.2, 0.8), widths = c(0.8, 0.2),
shareX = TRUE, shareY = TRUE, titleX = FALSE, titleY = FALSE
)
layout(s, showlegend = TRUE)
EX20 Create a dendrogramm of a sample of 2000 for the RTR2015 dataset only for 4G. What ist the difference in the interpretation, so do the operators treat the relation of upload to download speed differently by technology ? It is about the distribution !
# RTR2015_4G = RTR2015[ which(RTR2015$nw_cat=='4G'), ]
# RTR2015_4G_sample = RTR2015_4G[sample(nrow(RTR2015_4G), 2500), ]
# RTR2015_4G = RTR2015_4G_sample
x = RTR2015_sample$rtr_speed_dl
y = RTR2015_sample$rtr_speed_ul
s = subplot(
plot_ly(RTR2015_sample, x = x, color = I("green")) %>% add_trace(x = x, name = 'download speed'),
plotly_empty(),
plot_ly(RTR2015_sample, x = x, y = y, color = I("blue")) %>% add_markers(alpha = 0.2, color = ~op_name),
plot_ly(y = y, color = I("blue")) %>% add_trace(y = y, name = 'upload speed'),
nrows = 2, heights = c(0.2, 0.8), widths = c(0.8, 0.2),
shareX = TRUE, shareY = TRUE, titleX = FALSE, titleY = FALSE
)
layout(s, showlegend = TRUE)
21 Moscaicplot using the package ggmosaic.
library(ggmosaic)
p = ggplot(data = RTR2015_sample) +
geom_mosaic(aes(x = product(op_name, device_has_lte), fill = factor(nw_cat)),
divider = ddecker(), offset = 0.05) +
labs(x = "operator and LTE", y = "proportion", title = 'Mosaicplot') +
guides(fill = guide_legend(title = "technology", reverse = TRUE))
ggplotly(p)